#! /usr/bin/perl
#
# Copyright (c) 2001-2017, PostgreSQL Global Development Group
#
# src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
#
# Generate UTF-8 <--> EUC_JP code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain CP932.TXT and JIS0212.TXT from the
# organization's ftp site.

use strict;
use convutils;

my $this_script = $0;

# Load JIS0212.TXT
my $jis0212 = &read_source("JIS0212.TXT");

my @mapping;

foreach my $i (@$jis0212) {
	# We have a different mapping for this in the EUC_JP to UTF-8 direction.
	if ($i->{code} == 0x2243)
	{
		$i->{direction} = FROM_UNICODE;
	}

	if ($i->{code} == 0x2271)
	{
		$i->{direction} = TO_UNICODE;
	}

	if ($i->{ucs} >= 0x080)
	{
		$i->{code} = $i->{code} | 0x8f8080;
	}
	else
	{
		next;
	}

	push @mapping, $i;
}

# Load CP932.TXT.
my $ct932 = &read_source("CP932.TXT");

foreach my $i (@$ct932) {
	my $sjis = $i->{code};

	# We have a different mapping for this in the EUC_JP to UTF-8 direction.
	if ($sjis == 0xeefa ||
		$sjis == 0xeefb ||
		$sjis == 0xeefc)
	{
		next;
	}

	if ($sjis >= 0xa1)
	{
		my $jis = &sjis2jis($sjis);

		$i->{code} = $jis | ($jis < 0x100 ? 0x8e00 :
							 ($sjis >= 0xeffd  ? 0x8f8080 : 0x8080));

		# Remember the SJIS code for later.
		$i->{sjis} = $sjis;

		push @mapping, $i;
	}
}

foreach my $i (@mapping) {
	my $sjis = $i->{sjis};

	# These SJIS characters are excluded completely.
	if ($sjis >= 0xed00 && $sjis <= 0xeef9 ||
		$sjis >= 0xfa54 && $sjis <= 0xfa56 ||
		$sjis >= 0xfa58 && $sjis <= 0xfc4b)
	{
		$i->{direction} = NONE;
		next;
	}

	# These SJIS characters are only in the UTF-8 to EUC_JP table
	if ($sjis == 0xeefa || $sjis == 0xeefb || $sjis == 0xeefc)
	{
		$i->{direction} = FROM_UNICODE;
		next;
	}

	if ($sjis == 0x8790 || $sjis == 0x8791 || $sjis == 0x8792 ||
		$sjis == 0x8795 || $sjis == 0x8796 || $sjis == 0x8797 ||
		$sjis == 0x879a || $sjis == 0x879b || $sjis == 0x879c ||
		($sjis >= 0xfa4a && $sjis <= 0xfa53))
	{
		$i->{direction} = TO_UNICODE;
		next;
	}
}

push @mapping, (
	 {direction => BOTH, ucs => 0x4efc, code => 0x8ff4af, comment => '# CJK(4EFC)'},
	 {direction => BOTH, ucs => 0x50f4, code => 0x8ff4b0, comment => '# CJK(50F4)'},
	 {direction => BOTH, ucs => 0x51EC, code => 0x8ff4b1, comment => '# CJK(51EC)'},
	 {direction => BOTH, ucs => 0x5307, code => 0x8ff4b2, comment => '# CJK(5307)'},
	 {direction => BOTH, ucs => 0x5324, code => 0x8ff4b3, comment => '# CJK(5324)'},
	 {direction => BOTH, ucs => 0x548A, code => 0x8ff4b5, comment => '# CJK(548A)'},
	 {direction => BOTH, ucs => 0x5759, code => 0x8ff4b6, comment => '# CJK(5759)'},
	 {direction => BOTH, ucs => 0x589E, code => 0x8ff4b9, comment => '# CJK(589E)'},
	 {direction => BOTH, ucs => 0x5BEC, code => 0x8ff4ba, comment => '# CJK(5BEC)'},
	 {direction => BOTH, ucs => 0x5CF5, code => 0x8ff4bb, comment => '# CJK(5CF5)'},
	 {direction => BOTH, ucs => 0x5D53, code => 0x8ff4bc, comment => '# CJK(5D53)'},
	 {direction => BOTH, ucs => 0x5FB7, code => 0x8ff4be, comment => '# CJK(5FB7)'},
	 {direction => BOTH, ucs => 0x6085, code => 0x8ff4bf, comment => '# CJK(6085)'},
	 {direction => BOTH, ucs => 0x6120, code => 0x8ff4c0, comment => '# CJK(6120)'},
	 {direction => BOTH, ucs => 0x654E, code => 0x8ff4c1, comment => '# CJK(654E)'},
	 {direction => BOTH, ucs => 0x663B, code => 0x8ff4c2, comment => '# CJK(663B)'},
	 {direction => BOTH, ucs => 0x6665, code => 0x8ff4c3, comment => '# CJK(6665)'},
	 {direction => BOTH, ucs => 0x6801, code => 0x8ff4c6, comment => '# CJK(6801)'},
	 {direction => BOTH, ucs => 0x6A6B, code => 0x8ff4c9, comment => '# CJK(6A6B)'},
	 {direction => BOTH, ucs => 0x6AE2, code => 0x8ff4ca, comment => '# CJK(6AE2)'},
	 {direction => BOTH, ucs => 0x6DF2, code => 0x8ff4cc, comment => '# CJK(6DF2)'},
	 {direction => BOTH, ucs => 0x6DF8, code => 0x8ff4cb, comment => '# CJK(6DF8)'},
	 {direction => BOTH, ucs => 0x7028, code => 0x8ff4cd, comment => '# CJK(7028)'},
	 {direction => BOTH, ucs => 0x70BB, code => 0x8ff4ae, comment => '# CJK(70BB)'},
	 {direction => BOTH, ucs => 0x7501, code => 0x8ff4d0, comment => '# CJK(7501)'},
	 {direction => BOTH, ucs => 0x7682, code => 0x8ff4d1, comment => '# CJK(7682)'},
	 {direction => BOTH, ucs => 0x769E, code => 0x8ff4d2, comment => '# CJK(769E)'},
	 {direction => BOTH, ucs => 0x7930, code => 0x8ff4d4, comment => '# CJK(7930)'},
	 {direction => BOTH, ucs => 0x7AE7, code => 0x8ff4d9, comment => '# CJK(7AE7)'},
	 {direction => BOTH, ucs => 0x7DA0, code => 0x8ff4dc, comment => '# CJK(7DA0)'},
	 {direction => BOTH, ucs => 0x7DD6, code => 0x8ff4dd, comment => '# CJK(7DD6)'},
	 {direction => BOTH, ucs => 0x8362, code => 0x8ff4df, comment => '# CJK(8362)'},
	 {direction => BOTH, ucs => 0x85B0, code => 0x8ff4e1, comment => '# CJK(85B0)'},
	 {direction => BOTH, ucs => 0x8807, code => 0x8ff4e4, comment => '# CJK(8807)'},
	 {direction => BOTH, ucs => 0x8B7F, code => 0x8ff4e6, comment => '# CJK(8B7F)'},
	 {direction => BOTH, ucs => 0x8CF4, code => 0x8ff4e7, comment => '# CJK(8CF4)'},
	 {direction => BOTH, ucs => 0x8D76, code => 0x8ff4e8, comment => '# CJK(8D76)'},
	 {direction => BOTH, ucs => 0x90DE, code => 0x8ff4ec, comment => '# CJK(90DE)'},
	 {direction => BOTH, ucs => 0x9115, code => 0x8ff4ee, comment => '# CJK(9115)'},
	 {direction => BOTH, ucs => 0x9592, code => 0x8ff4f1, comment => '# CJK(9592)'},
	 {direction => BOTH, ucs => 0x973B, code => 0x8ff4f4, comment => '# CJK(973B)'},
	 {direction => BOTH, ucs => 0x974D, code => 0x8ff4f5, comment => '# CJK(974D)'},
	 {direction => BOTH, ucs => 0x9751, code => 0x8ff4f6, comment => '# CJK(9751)'},
	 {direction => BOTH, ucs => 0x999E, code => 0x8ff4fa, comment => '# CJK(999E)'},
	 {direction => BOTH, ucs => 0x9AD9, code => 0x8ff4fb, comment => '# CJK(9AD9)'},
	 {direction => BOTH, ucs => 0x9B72, code => 0x8ff4fc, comment => '# CJK(9B72)'},
	 {direction => BOTH, ucs => 0x9ED1, code => 0x8ff4fe, comment => '# CJK(9ED1)'},
	 {direction => BOTH, ucs => 0xF929, code => 0x8ff4c5, comment => '# CJK COMPATIBILITY IDEOGRAPH-F929'},
	 {direction => BOTH, ucs => 0xF9DC, code => 0x8ff4f2, comment => '# CJK COMPATIBILITY IDEOGRAPH-F9DC'},
	 {direction => BOTH, ucs => 0xFA0E, code => 0x8ff4b4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0E'},
	 {direction => BOTH, ucs => 0xFA0F, code => 0x8ff4b7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0F'},
	 {direction => BOTH, ucs => 0xFA10, code => 0x8ff4b8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA10'},
	 {direction => BOTH, ucs => 0xFA11, code => 0x8ff4bd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA11'},
	 {direction => BOTH, ucs => 0xFA12, code => 0x8ff4c4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA12'},
	 {direction => BOTH, ucs => 0xFA13, code => 0x8ff4c7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA13'},
	 {direction => BOTH, ucs => 0xFA14, code => 0x8ff4c8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA14'},
	 {direction => BOTH, ucs => 0xFA15, code => 0x8ff4ce, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA15'},
	 {direction => BOTH, ucs => 0xFA16, code => 0x8ff4cf, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA16'},
	 {direction => BOTH, ucs => 0xFA17, code => 0x8ff4d3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA17'},
	 {direction => BOTH, ucs => 0xFA18, code => 0x8ff4d5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA18'},
	 {direction => BOTH, ucs => 0xFA19, code => 0x8ff4d6, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA19'},
	 {direction => BOTH, ucs => 0xFA1A, code => 0x8ff4d7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1A'},
	 {direction => BOTH, ucs => 0xFA1B, code => 0x8ff4d8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1B'},
	 {direction => BOTH, ucs => 0xFA1C, code => 0x8ff4da, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1C'},
	 {direction => BOTH, ucs => 0xFA1D, code => 0x8ff4db, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1D'},
	 {direction => BOTH, ucs => 0xFA1E, code => 0x8ff4de, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1E'},
	 {direction => BOTH, ucs => 0xFA1F, code => 0x8ff4e0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1F'},
	 {direction => BOTH, ucs => 0xFA20, code => 0x8ff4e2, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA20'},
	 {direction => BOTH, ucs => 0xFA21, code => 0x8ff4e3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA21'},
	 {direction => BOTH, ucs => 0xFA22, code => 0x8ff4e5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA22'},
	 {direction => BOTH, ucs => 0xFA23, code => 0x8ff4e9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA23'},
	 {direction => BOTH, ucs => 0xFA24, code => 0x8ff4ea, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA24'},
	 {direction => BOTH, ucs => 0xFA25, code => 0x8ff4eb, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA25'},
	 {direction => BOTH, ucs => 0xFA26, code => 0x8ff4ed, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA26'},
	 {direction => BOTH, ucs => 0xFA27, code => 0x8ff4ef, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA27'},
	 {direction => BOTH, ucs => 0xFA28, code => 0x8ff4f0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA28'},
	 {direction => BOTH, ucs => 0xFA29, code => 0x8ff4f3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA29'},
	 {direction => BOTH, ucs => 0xFA2A, code => 0x8ff4f7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2A'},
	 {direction => BOTH, ucs => 0xFA2B, code => 0x8ff4f8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2B'},
	 {direction => BOTH, ucs => 0xFA2C, code => 0x8ff4f9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2C'},
	 {direction => BOTH, ucs => 0xFA2D, code => 0x8ff4fd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2D'},
	 {direction => BOTH, ucs => 0xFF07, code => 0x8ff4a9, comment => '# FULLWIDTH APOSTROPHE'},
	 {direction => BOTH, ucs => 0xFFE4, code => 0x8fa2c3, comment => '# FULLWIDTH BROKEN BAR'},

	 # additional conversions for EUC_JP -> UTF-8 conversion
	 {direction => TO_UNICODE, ucs => 0x2116, code => 0x8ff4ac, comment => '# NUMERO SIGN'},
	 {direction => TO_UNICODE, ucs => 0x2121, code => 0x8ff4ad, comment => '# TELEPHONE SIGN'},
	 {direction => TO_UNICODE, ucs => 0x3231, code => 0x8ff4ab, comment => '# PARENTHESIZED IDEOGRAPH STOCK'}
	);

print_conversion_tables($this_script, "EUC_JP", \@mapping);


#######################################################################
# sjis2jis ; SJIS => JIS conversion
sub sjis2jis
{
	my ($sjis) = @_;

	return $sjis if ($sjis <= 0x100);

	my $hi = $sjis >> 8;
	my $lo = $sjis & 0xff;

	if ($lo >= 0x80) { $lo--; }
	$lo -= 0x40;
	if ($hi >= 0xe0) { $hi -= 0x40; }
	$hi -= 0x81;
	my $pos = $lo + $hi * 0xbc;

	if ($pos >= 114 * 0x5e && $pos <= 115 * 0x5e + 0x1b)
	{
		# This region (115-ku) is out of range of JIS code but for
		# convenient to generate code in EUC CODESET 3, move this to
		# seemingly duplicate region (83-84-ku).
		$pos = $pos - ((31 * 0x5e) + 12);

		# after 85-ku 82-ten needs to be moved 2 codepoints
		$pos = $pos - 2 if ($pos >= 84 * 0x5c + 82);
	}

	my $hi2 = $pos / 0x5e;
	my $lo2 = ($pos % 0x5e);

	my $ret = $lo2 + 0x21 + (($hi2 + 0x21) << 8);

	return $ret;
}
